# This script calculates various quantities reported in the main text
# which are not reported in the figures or tables. For each quantity,
# the relevant quotation from the text is provided. All the numbers refer to 
# the revised version: Shirikov_trust_propaganda_revised_Sep_2023.pdf.


# introduction ----
# "large-scale online experiment (𝑛 ≈ 22,400)" ------

# respondents with non-missing presidential approval
resp_non_missing_pa <- main_resp |>
  filter(session_id %in% main_stories$session_id) |>
  drop_na(pres_approval_dummy) |>
  pull(session_id)

# respondents with non-missing pride in Crimea annexation
resp_non_missing_crimea <- main_resp |> 
  filter(session_id %in% main_stories$session_id) |>
  drop_na(pride_history_crimea) |>
  pull(session_id)

# respondents with non-missing beliefs about EU and Ukraine
resp_non_missing_eu_ukr <- main_resp |> 
  filter(session_id %in% main_stories$session_id) |>
  drop_na(EU_Ukr_beliefs) |>
  pull(session_id)

# all respondents that are present in at least one of these sets
resp_total <- unique(c(resp_non_missing_pa,
                       resp_non_missing_crimea,
                       resp_non_missing_eu_ukr))
resp_total <- resp_total[resp_total %in% main_stories_data$session_id] |>
  length()

print(paste("Main study N:", resp_total))

# "nationally representative sample (𝑛 ≈ 1,600)" ------
print(paste("Nationally representative sample N:", levada_resp |> nrow()))

# "large survey on a representative online sample (𝑛 ≈ 2,200)" ------
print(paste("OMI study N:", 
            omi_resp |> 
              nrow()))
# the sample used in analyses conditional on presidential approval
print(paste("OMI study N (with non-missing presidential approval):", 
            omi_resp |> 
              drop_na(pres_approval_dummy) |>
              nrow()))

# Section 2.1 -------- 
# "The quiz was completed by 23,179 respondents" --------
all_quiz_respondents <- main_resp |>
  filter(session_id %in% main_stories_data$session_id) |>
  pull(session_id) |>
  length()
print(paste("Main study, quiz taken by:", 
            all_quiz_respondents))

# "13 percent were not asked about presidential approval" ------
resp_pa_not_asked <- main_resp |>
  filter(session_id %in% main_stories_data$session_id,
         pres_approval_asked == 0) |>
  pull(session_id) |>
  length()
print(paste("Presidential approval not asked:", 
            resp_pa_not_asked))
print(paste("Percent of the sample:", 
            round(100*resp_pa_not_asked/all_quiz_respondents, 2)))

# "In the remaining sample, 8 percent did not report their approval" ------
resp_pa_asked <- all_quiz_respondents - resp_pa_not_asked

resp_pa_asked_missing <- main_resp |>
  filter(session_id %in% main_stories_data$session_id,
         pres_approval_asked == 1) |>
  filter(is.na(pres_approval_dummy)) |>
  pull(session_id) |>
  length()
percent_resp_pa_asked_missing <- resp_pa_asked_missing/resp_pa_asked

print(paste("Respondents with missing presidential approval:", 
            resp_pa_asked_missing))
print(paste("Percent of the sample:", 
            round(100*percent_resp_pa_asked_missing, 1)))

# "About 13 percent did not answer questions about their age, gender, or education" -----
resp_demog_missing <- main_resp |>
  filter(session_id %in% main_stories_data$session_id,
         pres_approval_asked == 1) |>
  filter(is.na(female) | is.na(age_group) |is.na(education)) |>
  pull(session_id) |>
  length()
percent_resp_demog_missing <- resp_demog_missing/resp_pa_asked

print(paste("Respondents with missing gender/age/education:", 
            resp_demog_missing))
print(paste("Percent of the sample:", 
            round(100*percent_resp_demog_missing, 1)))

# "respondents who indicated having taken the quiz earlier (3 percent)" ------
resp_taken_earlier <- main_resp |>
  filter(session_id %in% main_stories_data$session_id,
         pres_approval_asked == 1) |>
  filter(taken == 1) |>
  pull(session_id) |>
  length()
percent_resp_taken_earlier <- resp_taken_earlier/resp_pa_asked
print(paste("Respondents who indicated taking the quiz earlier:", 
            resp_taken_earlier))
print(paste("Percent of the sample:", 
            round(100*percent_resp_taken_earlier, 1)))

# "Such irregular responses amounted to less than 2 percent of the data" ------
# sample after removing those with missing approval and those who took the quiz earlier
data_non_missing_pa_not_taken_before <- main_stories_data |>
  drop_na(pres_approval_dummy) |>
  filter(taken == 0 | is.na(taken))

# "irregular" responses: straightliners and unrealistically fast responses
irregular_responses <- main_stories_data |>
  drop_na(pres_approval_dummy) |>
  filter(taken == 0 | is.na(taken)) |>
  filter(all_labeled_fake == 1 | all_labeled_true == 1 | story_time <= 1) |>
  nrow()

print(paste("Percent of 'irregular' responses:", 
            round(100*irregular_responses/nrow(data_non_missing_pa_not_taken_before), 2)))

# "includes 306,801 decisions on the truthfulness of news messages..." --------
print(paste("Decisions in the resulting data set:", 
            main_stories |> 
              drop_na(pres_approval_dummy) |> 
              nrow()))

# "made by 17,974 respondents" ------
print(paste("Respondents in the resulting data set:", 
            main_stories |> 
              drop_na(pres_approval_dummy) |> 
              distinct(session_id) |>
              nrow()))

# for reference, the sample size reported in column 1 of Table B4 ("N Clusters: 17961")
# can be obtained by keeping only those story evaluations that are included
# in the main analysis of the experiment
print(paste("Respondents in the regression for the experiment:", 
            main_stories |> 
              drop_na(pres_approval_dummy) |> 
              filter(story_code %in% c(104:114, 5221:6162)) |>
              distinct(session_id) |>
              nrow()))

# "15 pro-Russia messages and 11 critical messages" -------
main_stories_info |> 
  group_by(story_direction) |>
  summarise(n = n())

# "9 pro-Russia, 5 critical, and 17 neutral stories" ------
main_stories_info |> 
  filter(story_code %in% c(104:114, 5221:6162)) |>
  group_by(story_direction) |>
  summarise(n = n())

# Section 3.4 ------
# "58 percent of supporters admitted that Channel One was not independent" -----
# N of supporters who thought Channel One to be not independent
sup_tv1_not_indep <- omi_resp |>
  filter(pres_approval_dummy == 1 & 
           ranking_independence_TV1 == "Not independent") |>
  nrow()
# total N of supporters
sup_total <- omi_resp |>
  filter(pres_approval_dummy == 1) |>
  nrow()

print(paste("Percent of supporters who found Channel One to be not independent:", 
            round(100*sup_tv1_not_indep/sup_total, 1)))

# "49 percent of those who recognized this lack of independence..." ------
# N of supporters who thought Channel One to be not independent but accurate
sup_tv1_not_indep_accurate <- omi_resp |>
  filter(pres_approval_dummy == 1 & 
           ranking_independence_TV1 == "Not independent" &
           ranking_accuracy_TV1 == "Mostly yes") |>
  nrow()

print(paste("Percent of supporters who found Channel One to be not independent but accurate:", 
            round(100*sup_tv1_not_indep_accurate/sup_tv1_not_indep, 1)))

# "34 percent of them listed this station among trusted news outlets" -----
# N of supporters who thought Channel One to be not independent but trustworthy
sup_tv1_not_indep_trusted <- omi_resp |>
  filter(pres_approval_dummy == 1 & 
           ranking_independence_TV1 == "Not independent" & 
           source_trusted_TV1 == 1) |>
  nrow()

print(paste("Percent of supporters who found Channel One to be not independent but accurate:", 
            round(100*sup_tv1_not_indep_trusted/sup_tv1_not_indep, 1)))

# "almost 60 percent of Putin supporters reported knowledge of some ------
# independent news organizations"
# N of supporters who reported knowing any independent outlets
sup_indep_known <- omi_resp |>
  filter(pres_approval_dummy == 1 & 
           source_known_independent == 1) |>
  nrow()

print(paste("Percent of supporters who knew any independent outlets:", 
            round(100*sup_indep_known/sup_total, 1)))

# "Among pro-Putin respondents who found Channel One accurate and truthful, 
# 6.2 percent reported trusting at least one independent news outlet" ------
# N of supporters who found Channel One accurate
sup_tv1_accurate <- omi_resp |>
  filter(pres_approval_dummy == 1 & 
           ranking_accuracy_TV1 == "Mostly yes") |>
  nrow()
# N of supporters who found Channel One accurate AND trusted any independent media
sup_tv1_accurate_trust_indep <- omi_resp |>
  filter(pres_approval_dummy == 1 & 
           ranking_accuracy_TV1 == "Mostly yes" &
           source_trusted_independent == 1) |>
  nrow()

print(paste("Percent of supporters who found Channel One accurate and trusted any independent outlets:", 
            round(100*sup_tv1_accurate_trust_indep/sup_tv1_accurate, 1)))

# "among supporters who admitted that Channel One often publishes false information,
# this proportion was 10.3 percent" ------
# N of supporters who found Channel One inaccurate
sup_tv1_false <- omi_resp |>
  filter(pres_approval_dummy == 1 & 
           ranking_accuracy_TV1 == "Often gives false info") |>
  nrow()
# N of supporters who found Channel One inaccurate AND trusted any independent media
sup_tv1_false_trust_indep <- omi_resp |>
  filter(pres_approval_dummy == 1 & 
           ranking_accuracy_TV1 == "Often gives false info" &
           source_trusted_independent == 1) |>
  nrow()
print(paste("Percent of supporters who found Channel One inaccurate and trusted any independent outlets:", 
            round(100*sup_tv1_false_trust_indep/sup_tv1_false, 1)))


